package com.shujia.spark

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD

object Demo8GroupBy {
  def main(args: Array[String]): Unit = {


    val conf: SparkConf = new SparkConf().setMaster("local[4]").setAppName("map")
    val sc: SparkContext = new SparkContext(conf)


    val rdd1: RDD[String] = sc.textFile("spark/data/students.txt")


    /**
      * groupBy 指定一个分组的列
      *
      */
    rdd1
      .groupBy(line => line.split(",")(4))
      .map(kv => {
        val clazz: String = kv._1

        val sum: Int = kv._2.map(i => 1).sum

        (clazz, sum)
      }).foreach(println)

  }

}
